\documentclass{beamer}
%TODO check size when done
\usepackage{lmodern}
\usepackage{multimedia}
\usepackage[backend=bibtex,bibstyle=authoryear,citestyle=authortitle-ibid]{biblatex}
\usepackage{algorithm}
\usepackage{algpseudocode}
\usepackage{tabularx}
\usepackage{colortbl}

\AtBeginSection[] % Do nothing for \section*
{
\begin{frame}<beamer>
\frametitle{Outline}
\tableofcontents[currentsection]
\end{frame}
}

\setbeamertemplate{navigation symbols}{}

%\usetheme{Pittsburgh}
\title[] % (optional, only for long titles)
{Lifelong feature-based strategy planning for dynamic environments}
\author[] % (optional, for multiple authors)
{Nishant JAIN\inst{1,2} \\
\vspace{.5cm}
{\footnotesize under the supervision of}\\
\and Dr. Dizan VASQUEZ\inst{2}}
\institute[] % (optional)
{
  \inst{1}%
    Grenoble INP
    \and
  \inst{2}%
  Team CHROMA\\
  INRIA Grenoble Rh\^{o}ne-Alpes
}
\date[24 June 2015] % (optional)
%\subject{Computer Science}

\useoutertheme{infolines}
\setbeamertemplate{headline}{}
\bibliography{report1}{}


\begin{document}
%\begin{frame}
\titlegraphic{\noindent\includegraphics[height=7ex]{logoINP.png}\hfill\raisebox{2ex}{\includegraphics[height=3.5ex]{logoUJF.jpg}}\hfill\raisebox{1ex}{\includegraphics[height=5ex]{logo_INRIA.png}}}
\frame{\titlepage}

%\section*{Outline}
%\begin{frame}
%\frametitle{Outline}
%\tableofcontents
%\end{frame}

\section{Introduction}
%\end{frame}
\begin{frame}
    \frametitle{Learning from Demonstration}
    Goal:
    \begin{itemize}
     \item Observe person going from a state A to state B
     \item Move in a similar way from state C to state D
    \end{itemize}
    \begin{figure}[h!]
       \centering
         \includegraphics[scale=.3]{imgp6f.png}
       \label{al}
     \end{figure}
    Assumption:
    \begin{itemize}
      \item Humans act as planners
      \item Behaviour dictated by a cost function
    \end{itemize}
    Inverse Reinforcement Learning (IRL) can obtain the cost function
\end{frame}

\begin{frame}
 \frametitle{Test Problem}
 \framesubtitle{Behaviour A}
 \begin{center}
 \movie[width=8cm,height=6cm,poster,autostart,showcontrols=true]{}{vid1r.webm}
 \end{center}
\end{frame}

\begin{frame}
 \frametitle{Test Problem}
 \framesubtitle{Behaviour B}
 \begin{center}
 \movie[width=8cm,height=6cm,poster,autostart,showcontrols=true]{}{vid2r.webm}
 \end{center}
\end{frame}

\begin{frame}
\frametitle{The reward function \footcite{Ng00algorithmsfor}}
The reward is defined in terms of the context of the agent (features $\boldsymbol{\phi} : S \to [0,1]^d$)
\begin{equation}
 R(s)=w_1\phi_1+w_2\phi_2+\cdots +w_d\phi_d
\end{equation}
\begin{figure}[h!]
   
   \centering
     \includegraphics[scale=.3]{imgp5f.png}
   \label{fs}
 \end{figure}
\end{frame}

\begin{frame}
    \frametitle{Inverse Reinforcement Learning}
    \framesubtitle{Markov Decision Process}
    Uses the reinforcement learning framework \\
    A finite MDP is a tuple $(S, A, \{P_{sa}\}, \gamma, R)$, where
     \begin{itemize} %reduce spacing
      \setlength{\itemsep}{-2pt}
      \item $S$ is a finite set of $N$ states
      \item $A$ = $\{a_1, \cdots, a_k\}$ is a set of $k$ actions
      \item $P_{sa}(.)$ are the state transition probabilities upon taking action $a$ in state $s$
      \item $\gamma \in [0,1)$ is the discount factor
      \item $R : S \mapsto \Re$ is the reward function
     \end{itemize}
    Goal: Determine a mapping (policy) $\pi : S \mapsto A$\\
\end{frame}

\begin{frame}{Common IRL approach}
\begin{itemize}
\item Demonstrated path or trajectory: $\zeta$ = ($s_0, s_1, \cdots,$)
\item Sum of features(feature count): $F_{\zeta} = \sum_{s_j \in \zeta} \boldsymbol{\phi}(s_j)$
\end{itemize}
\begin{figure}[h!]
   \centering
     \includegraphics[scale=.4]{imgp1f.png}
   \label{irl1}
 \end{figure}
\end{frame}


\begin{frame}{Our Contributions}
\begin{itemize}
\item Implementation of the state of the art algorithms
\item Extension of algorithms to dynamic environments
\item Extension of two approaches (Maximum Entropy IRL \& Bayesian IRL)
\item A new algorithm using genetic algorithms
\end{itemize}
\end{frame}

\begin{frame}[shrink=20]{State of the art}
\begin{center}
  \begin{tabularx}{\textwidth}{ | p{2.8cm} | p{5.49cm} | p{5.49cm} |}
    \hline
    Algorithm & Advantages & Disadvantages \\ \hline
    Linear  & Low computational complexity& Requires complex features \\
    Programming   & & No batch definition \\ \hline
    Quadratic  & Low computational complexity & No batch definition \\    
	Programming (QP)  & & Does not always converge to a solution \\ \hline 
    Maximum  & Multi-objective approach & Restrictions on features \\        
    Entropy (ME)  & Does not require the solution of underlying MDP & Gradient based approach may not be globally optimal \\ \hline
    Bayesian  & Efficient computation & Require complete policy\\        
    approaches   & & Limited to small state spaces \\ \hline
    Supervised learning approaches & Do not require the solution of the underlying MDP & Require complete policy\\ \hline
  \end{tabularx}
\end{center}
\end{frame}

\section{Proposed Approaches}
\begin{frame}{Proposed Approaches}
\framesubtitle{IRL using genetic algorithms (GA)}
 \begin{itemize}
  \item QP fails quickly in a batch setting
  \item Uses a multi-objective genetic algorithm
  \item Provides a number of solutions
  \item Provides globally optimal solutions
  \item Computationally expensive
 \end{itemize}
\end{frame}

\begin{frame}{ME: Poor performance}
 \begin{center}
 \movie[width=8cm,height=6cm,poster,autostart,showcontrols=true]{}{vid3r.webm}
 \end{center}
\end{frame}

\begin{frame}{Proposed Approaches}
\framesubtitle{Normalized maximum entropy IRL (MEN)}
 \begin{itemize}
  \item Maximum entropy IRL requires feature counts to be of the same magnitude
  \item We evaluate the average sum of features $\phi_{i \: avg}$ 
  \item $\nabla L(\mathbf{w})_i=\frac{\nabla L(\mathbf{w})_i}{\phi_{i \: avg}} \quad \forall i \in \{1, 2, 3\}$
 \end{itemize}
\end{frame}

\begin{frame}{Proposed Approaches}
\framesubtitle{Bayesian IRL (BIRL) and Improved Bayesian IRL (BIRLI)}
\begin{itemize}
 \item Standard algorithm limited to small state spaces
 \item Modified to use $R$ as a weighted sum of features
 \item Uses a random walk on the weight
 
\end{itemize}
BIRLI:
\begin{itemize}
\item BIRL requires the complete policy
\item Uses a different probability measure
\item $P(\zeta|\mathbf{w}) \propto e^{-|\mathbf{w} ^\top (F_{\zeta_\mathbf{w}} - F_\zeta)|}$
\end{itemize}
\end{frame}

\section{Experiment setup}

\begin{frame}{Test Problem}
\begin{itemize}
 \item Motion is restricted to one dimension
\end{itemize}
\begin{figure}[h!]
   
   \centering
     \includegraphics[scale=.3]{img1f.png}
     \caption{Robot in green, red objects indicating cars to be avoided, blue indicating objects to be collected}
   \label{gm}
 \end{figure}
\end{frame}

\begin{frame}{Test Problem}
\begin{figure}[h!]
   
   \centering
     \includegraphics[scale=.5]{img1f.png}
   \label{gmf}
 \end{figure}
\begin{itemize}
\item $\phi_1$; Collision with red object
\item $\phi_2$; Movement
\item $\phi_3$; Collision with blue object
\item $R(s,a)=w_1\phi_1(s,a) + w_2\phi_2(s,a) + w_3\phi_3(s,a)$
\end{itemize}
\end{frame}

\begin{frame}{Test Problem}
\framesubtitle{Experiments}
Two set of experiments\\
Set 1
\begin{itemize}
\item Artificial trajectories based on pre-defined weights
\item Goal: recover the correct weights
\end{itemize}
Set 2
\begin{itemize}
\item Trajectories under human control
\item Goal: reproduce similar behaviour
\end{itemize}
\end{frame}

\begin{frame}{Metrics}
\begin{itemize}
 \item Path match: Percentage of common states in two paths
 \item Policy match for the observed path : Percentage match of the common actions on the expert path
 \item Average difference : Difference in feature counts for two paths
\end{itemize}
\begin{figure}[h!]
   
   \centering
     \includegraphics[scale=.3]{imgp4f.png}
   \label{pmt}
 \end{figure}
\end{frame}

\begin{frame}{Experiment set 1}
 
Deterministic transitions:
 \begin{figure}[h!]
   
   \centering
     \includegraphics[scale=.3]{imgp2f.png}
   \label{daa}
 \end{figure}
Non-deterministic transitions:
 \begin{figure}[h!]
   
   \centering
     \includegraphics[scale=.3]{imgp3f.png}
   \label{dab}
 \end{figure}
\end{frame}

\begin{frame}[shrink=10]{Experiment set 1}
\framesubtitle{Results: Deterministic actions}
\begin{center}
  \begin{tabular}{ | l | c | c | c | c |}
    \hline
    Algorithm & Weights & Path Match & Policy Match & Average Difference \\ \hline
    QP & 0.9264706 & 79.88 & 96.78 & 0.0356871 \\        
       & 0.0882353 & 	   &       & 0.1260655 \\
       & 0.6911765 & 	   &       & 0.0444611 \\ \hline
    ME & 0.2099072 & 27.35 & 69.54 & 0.2191048 \\        
       & 0.7900928 & 	   &       & 0.9138373 \\
       & 1.341D-08 & 	   &       & 0.1451539 \\ \hline  
    BIRL & 0.4368039 & 53.12 & 65.84 & 0.0451011 \\        
       & 0.0 & 	   &       & 0.4914036 \\
       & 0.5631961 & 	   &       & 0.0421774 \\ \hline 
    \cellcolor[gray]{0.8}GA & 0.5817141 & 95.84 & 99.46 & 0.0060606 \\        
    \cellcolor[gray]{0.8}       & 0.0237875 & 	   &       & 0.0286501 \\
    \cellcolor[gray]{0.8}       & 0.6006902 & 	   &       & 0.0060606 \\ \hline
    \cellcolor[gray]{0.8}\textbf{MEN} & 0.4768660 & \textbf{97.66} & \textbf{99.86} & \textbf{0.0} \\        
    \cellcolor[gray]{0.8}       & 0.0165905 & 	   &       & \textbf{0.0165289} \\
    \cellcolor[gray]{0.8}       & 0.5065435 & 	   &       & \textbf{0.0} \\ \hline
    \cellcolor[gray]{0.8}BIRLI & 0.4867780 & 94.99 & 99.06 & 0.0096970 \\        
    \cellcolor[gray]{0.8}   & 0.0248060 & 	   &       & 0.0431956 \\
    \cellcolor[gray]{0.8}   & 0.4884160 & 	   &       & 0.0096970 \\ \hline
  \end{tabular}
\end{center}
\end{frame}

\begin{frame}[shrink=10]{Experiment set 1}
\framesubtitle{Results: Non-deterministic actions}
\begin{center}
  \begin{tabular}{ | l | c | c | c | c |}
      \hline
      Algorithm & Weights & Path Match & Policy Match & Average Difference \\ \hline
      QP & 0.5817141 & 55.98 & 84.69 & 0.0178033 \\        
         & 0.0237875 & 	   &       & 0.3870907 \\
         & 0.6006902 & 	   &       & 0.0153828 \\ \hline
      ME & 0.9768493 & 63.23 & 87.79 & 0.0364202 \\        
         & 2.743D-08 & 	   &       & 0.1333707 \\
         & 0.0231507 & 	   &       & 0.0560314 \\ \hline
      BIRL & 0.7904742 & 71.99 & 90.02 & 0.0302867 \\        
         & 0.0 & 	   &       & 0.1718813 \\
         & 0.2095258 & 	   &       & 0.0358048 \\ \hline 
      \cellcolor[gray]{0.8}\textbf{GA} & 0.6532417 & \textbf{98.09} & \textbf{99.77} & \textbf{0.0062628} \\        
      \cellcolor[gray]{0.8}   & 0.0009902 & 	   &       & \textbf{0.0232378} \\
      \cellcolor[gray]{0.8}   & 0.6349674 & 	   &       & \textbf{0.0061540} \\ \hline
      \cellcolor[gray]{0.8}MEN & 0.6362234 & 82.70 & 93.72 & 0.0305718 \\        
      \cellcolor[gray]{0.8}   & 6.943D-21 & 	   &       & 0.1098317 \\
      \cellcolor[gray]{0.8}   & 0.3637766 & 	   &       & 0.0229517 \\ \hline
      \cellcolor[gray]{0.8}BIRLI & 0.4666431 & 88.25 & 95.63 & 0.0072912 \\        
      \cellcolor[gray]{0.8}   & 0.0 & 	   &       & 0.1137779 \\
      \cellcolor[gray]{0.8}   & 0.5333569 & 	   &       & 0.0078020 \\ \hline
    \end{tabular}
\end{center}
\end{frame}

\begin{frame}{Experiment set 2}
Two games:\\
\textbf{Rule set 1}
\begin{itemize}
 \setlength{\itemsep}{-2pt}
 \item Try to avoid the red objects
 \item Try to collect blue objects
\end{itemize}
\textbf{Rule set 2}
\begin{itemize}
 \setlength{\itemsep}{-2pt}
 \item Try to hit the red objects 
 \item Try to minimize movement
 \item Try to avoid the blue objects 
\end{itemize}
Transitions are considered to be deterministic
\end{frame}

\begin{frame}[shrink=10]{Experiment set 2}
\framesubtitle{Rule set 1}
\begin{center}
  \begin{tabular}{ | l | c | c | c | c |}
    \hline
    Algorithm & Weights & Path Match & Policy Match & Average Difference \\ \hline
    QP & 0.0013509 & 39.86 & 76.57 & 0.1232874 \\        
       & 0.0006991 & 	   &       & 0.3835450 \\
       & 0.0401125 & 	   &       & 0.0740975 \\ \hline
    ME & 0.2035503 & 25.48 & 68.78 & 0.1934495 \\        
       & 0.7964433 & 	   &       & 0.9488993 \\
       & 0.0000063 & 	   &       & 0.1281234 \\ \hline
    \cellcolor[gray]{0.8}GA & 0.2666580 & \textbf{47.24} & 77.26 & 0.1027817 \\        
    \cellcolor[gray]{0.8}   & 0.0150257 & 	   &       & 0.2247087 \\
    \cellcolor[gray]{0.8}   & 0.3756208 & 	   &       & 0.0699799 \\ \hline
    \cellcolor[gray]{0.8}MEN & 0.2849231 & 43.87 & \textbf{77.46} & 0.1104534 \\        
    \cellcolor[gray]{0.8}   & 0.0658453 & 	   &       & 0.3742005 \\
    \cellcolor[gray]{0.8}   & 0.6492316 & 	   &       & 0.0540322 \\ \hline
    \cellcolor[gray]{0.8}BIRLI & 0.3928222 & 32.74  & 53.94 & 0.1173798 \\        
    \cellcolor[gray]{0.8}      & 0 & 	   &       & 0.5801888 \\
    \cellcolor[gray]{0.8}      & 0.6071778 & 	   &       & 0.0593870 \\ \hline
  \end{tabular}
\end{center}
\end{frame}


\begin{frame}[shrink=10]{Experiment set 2}
\framesubtitle{Rule set 2}
\begin{center}
  \begin{tabular}{ | l | c | c | c | c |}
      \hline
      Algorithm & Weights & Path Match & Policy Match & Average Difference \\ \hline
      QP & 0.12 & 34.67 & 84.55 & 0.2105596 \\        
         & 0.1371429 & 	   &       & 0.2970270 \\
         & 0.9057143 & 	   &       & 0.0053476 \\ \hline
      ME & 2.909D-17 & 14.43 & \textbf{85.22} & 0.5744536 \\        
         & 1 & 	   &       & 0.4434879 \\
         & 6.377D-24 & 	   &       & 0.0321774 \\ \hline        
      \cellcolor[gray]{0.8} GA & 0.3174619 & \textbf{35.76} & 84.54 & 0.3135418 \\        
      \cellcolor[gray]{0.8}   & 0.3135551 & 	   &       & 0.3143837 \\
      \cellcolor[gray]{0.8}   & 0.7662550 & 	   &       & 0.0053476 \\ \hline
      \cellcolor[gray]{0.8}MEN & 0.0014155 & 14.43 & \textbf{85.22} & 0.5744536 \\        
      \cellcolor[gray]{0.8}   & 0.9985845 & 	   &       & 0.4434879 \\
      \cellcolor[gray]{0.8}   & 3.452D-21 & 	   &       & 0.0321774 \\ \hline
      \cellcolor[gray]{0.8}BIRLI & 0.2159944 & 35.20 & 85.05 & \textbf{0.2082719} \\        
      \cellcolor[gray]{0.8}       & 0.2190940 & 	   &       & \textbf{0.2525666} \\
      \cellcolor[gray]{0.8}       & 0.5649116 & 	   &       & \textbf{0.0053476} \\ \hline
    \end{tabular}
\end{center}
\end{frame}

\section{Conclusion and future work}
\begin{frame}{Conclusion}
%TODO change
\begin{itemize}
 \item The state of the art algorithms have many limitations
 \item GA performed well consistently
 \item MEN, BIRLI outperformed their standard counterparts
 \item GA is computationally expensive to train
 \item BIRLI may fail in certain circumstances
\end{itemize}
\end{frame}

\begin{frame}{PhD proposal}
\framesubtitle{Planning based prediction with Inverse reinforcement learning}
\begin{itemize}
 \item Non-linear reward function
 \item Feature selection
 \item Continuous state spaces and actions \footcite{2012-cioc}
 \item Multiple agents
\end{itemize}
\begin{center}
 \movie[width=4cm,height=4cm,poster,autostart,showcontrols=true]{}{vid4r.webm}
\end{center}
\end{frame}

%\begin{frame}{PhD proposal}
%\framesubtitle{A game-theoretic approach to autonomous security patrolling using micro aerial vehicles}
%\begin{itemize}
% \item Patrolling is one of the standard ways to address security threats
% \item Defender and attacker in a partially observable stochastic game
% \item Defender chooses action/policy first
% \item Game repeated till attacker arrested
% \item Micro aerial vehicles (MAVs) used for enforcement
%\end{itemize}
%\end{frame}

%\begin{frame}{PhD proposal}
%\framesubtitle{Goals}
%\begin{itemize}
% \item Find the patrol strategy for best security performance
% \item Complexity analysis
% \item Attacking may not act optimally
% \item Developing algorithms for MAVs relying on sensor fusion
%\end{itemize}
%\end{frame}

\begin{frame}
\begin{center}
\Huge
Thank You
\end{center}
\end{frame}

\end{document}
